********************************************************************************

* collapse_org.do
* Collapses 1987-2017 march cps data by cohort and other 
* Extracts made:
*	1) year-cohort level
*	2) year-cohort-state (current state)
*	3) year-cohort-attainment
*	4) year-cohort-attainment-state

* Edited, RY, 3/7/2018 
* modified JR 4/30/2018: Use 2, 4, and 5 education groups.
*                        Remove merge to unemployment rate (now in combinecollapse)
* modified JR 1/9/2019:  Reduce variables to keep, and generate new weight that is limited
*                        to those with non-missing earnings.

cap project, doinfo
if _rc==0 {
	 local pdir "`r(pdir)'"						  	    // the project's main dir.
	 local dofile "`r(dofile)'"						    // do-file's stub name
   local sig {bind:{hi:[`dofile'.dta. RP : `dofile'.do, `c(current_date)']}}	// a signature in notes
   local doasproject=1
}
else {
   local pdir "~/GRscarring"
   local dofile "collapse_org"
   local doasproject=0
}

set more off
local rootdir "`pdir'"
local thisdir "`pdir'"


local prepdata "`pdir'/scratch"
local rawdata "`pdir'/rawdata"


***************************************************************************************************************
*************************************
********** 0: LOAD DATA *************
*************************************

*** ORG DATA ***

if `doasproject'==1 {
  project, uses("`prepdata'/extractorg_morg.dta.gz")
}

!zcat `prepdata'/extractorg_morg.dta.gz > `prepdata'/extractorg_morg.dta
use `prepdata'/extractorg_morg.dta, clear
!rm `prepdata'/extractorg_morg.dta

*use if inlist(stfips, 1, 3, 7) & inlist(educ5,2,3) using  `prepdata'/extractorg.dta, clear


*** COHORT: ****
gen cohort=(year-age)


************************************************
*********** 1: SAMPLE RESTRICTIONS *************
************************************************

keep if age>15 & age<81


************************************************
*********** 2: MAKE SOME VARIABLES *************
************************************************

*** Variables to collapse by:

* educ5
gen educ4=.
replace educ4=1 if inlist(educ92,0,1,2,3,4,5,6,7,8)==1
replace educ4=2 if inlist(educ92,9)==1
replace educ4=3 if inlist(educ92,10,11,12)==1
replace educ4=4 if inlist(educ92,13,14,15,16)==1
  
gen educ5=educ4
replace educ5=5 if inlist(educ92,14,15,16)
drop if educ5==.
label define attain_l 1 "LTHS" 2 "HS" 3 "Some col." 4 "BA" 5 "MA+"
label values educ5 attain_l
gen byte educ2=(inlist(educ5, 4, 5)) if educ5<.

gen orgwgt_rw_l=orgwgt if rw_l<.
gen earnwt_rw_l=earnwt if rw_l<.

rename gestfips fipsst

******** Sex ******
replace sex = 0 if sex == 2
tab sex

sort year cohort fipsst educ5

tempfile all 
save `all'

*************************************
*********** 3: COLLAPSE *************
*************************************
/*Paid hourly (paidhre)
Wage (NBER def) (w_nber)
Wage (no topcode/OT adjustment) (w_no_no)
Hours used for wage calculations (hours_jr)
Wage (JR definition) (wage_jr)
Real wage (JR definition) (rw)
Real wage (CEPR method) (rw_cep)
Real wage (NBER definition) (rw_nber)
Occupation mean earnings (wage_occup) */

*local vlist "rw rw_l rw_nber rw_nber_l rwage_occup usualhoursi"
local vlist "rw_l"
local wlist "orgwgt orgwgt_rw_l earnwt earnwt_rw_l"

** 1.1: Year-cohort
collapse (mean) `vlist' sex ///
	(count) n_obs=orgwgt (rawsum) `wlist',  ///
	by(year cohort)


tempfile yc
save `yc'

** 1.2: Year-cohort-state
use `all', clear
collapse (mean) `vlist' ///
	(count) n_obs=orgwgt (rawsum) `wlist',  ///
	by(year cohort fipsst)

tempfile ycs
save `ycs'

** 1.3: Year-cohort-attainment (5 category)
use `all', clear
collapse (mean) `vlist' ///
	(count) n_obs=orgwgt (rawsum) `wlist',  ///
	by(year cohort educ5)

tempfile yca5
save `yca5'

** 1.4: Year-cohort-attainment (5 category)-state
use `all', clear
collapse (mean) `vlist' ///
	(count) n_obs=orgwgt (rawsum) `wlist',  ///
	by(year cohort fipsst educ5)
tempfile yca5s
save `yca5s'

** 1.5: Year-cohort-attainment (4 category)
use `all', clear
collapse (mean) `vlist' ///
	(count) n_obs=orgwgt (rawsum) `wlist',  ///
	by(year cohort educ4)
tempfile yca4
save `yca4'

** 1.6: Year-cohort-attainment (4 category)-state
use `all', clear
collapse (mean) `vlist' ///
	(count) n_obs=orgwgt (rawsum) `wlist',  ///
	by(year cohort fipsst educ4)
tempfile yca4s
save `yca4s'

** 1.7: Year-cohort-attainment (2 category)
use `all', clear
collapse (mean) `vlist' ///
	(count) n_obs=orgwgt (rawsum) `wlist',  ///
	by(year cohort educ2)

tempfile yca2
save `yca2'

** 1.8: Year-cohort-attainment (2 category)-state
use `all', clear
collapse (mean) `vlist' ///
	(count) n_obs=orgwgt (rawsum) `wlist',  ///
	by(year cohort fipsst educ2)
tempfile yca2s
save `yca2s'

** 1.9: Year-cohort-attainment (2 category)-sex-state
use `all', clear
collapse (mean) `vlist' ///
	(count) n_obs=orgwgt (rawsum) `wlist',  ///
	by(year cohort fipsst educ2 sex)
tempfile yca2ss
save `yca2ss'

  
********************************************
**** 5.  LABEL VARIABLES *****
********************************************
  foreach set in yc ycs yca5 yca5s yca4 yca4s yca2 yca2s yca2ss {
    use ``set''
    label var n_obs "Number of observations in cell"
    label var orgwgt "Sum of ORG weights (unweighted/raw)"	
    label var rw_l "Log of real wage (JR/CEPR definition)"
    label var orgwgt_rw_l "Sum of ORG weights (non-missing rw_l)"	
    label var earnwt "Sum of ORG earnings weights (unweighted/raw)"	
    label var earnwt_rw_l "Sum of ORG earnings weights (non-missing rw_l)"	

    save ``set'', replace
}

drop orgwgt orgwgt_rw_l
rename earnwt orgwgt
rename earnwt_rw_l orgwgt_rw_l

*************************************
****** 6: COMPRESS AND SAVE *********
*************************************

foreach col in yc ycs yca5 yca5s yca4 yca4s yca2 yca2s yca2ss {
	use ``col'', clear
	save "`prepdata'/collapse_org_`col'.dta", replace
	*! gzip -f `prepdata'/`dofile'_`col'.dta
	*project, creates("`prepdata'/`dofile'_`col'.dta.gz")
	if `doasproject'==1 project, creates("`prepdata'/collapse_org_`col'.dta")
}


* end of do file *


